import gym
import time
import matplotlib.pyplot as plt
from matplotlib import animation

env_name = "Acrobot-v1"


def display():
    env=gym.make(env_name,render_mode="human")
    done = False
    score = 0
    state = env.reset()
    step = 0
    while not done and step<100: 
        env.render()     
        action = env.action_space.sample() 
        observation,reward,done,info,_ =  env.step(action)
        score+=reward 
        step+=1
        print(f'observation: {observation},reward: {reward},step: {step}')
    print(f'reward: {score}')

def display_as_gif():
    frames = []
    done = False
    env = gym.make(env_name,render_mode="rgb_array")
    env.reset()
    score = 0
    step = 0
    while not done and step < 2000:
        frames.append(env.render())
        action = env.action_space.sample()
        obs,reward,done,info,_ = env.step(action)
        score+=reward
        step+=1
    print(f'score: {score}')

    plt.figure(figsize=(frames[0].shape[1]/72,frames[0].shape[0]/72),dpi=72)
    patch = plt.imshow(frames[0])
    plt.axis('off')

    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(),animate,frames=len(frames),interval=50)
    anim.save('E:/chartGPT_app/Reinforcement_learning/m_acrobot.gif') 
    env.close()

display_as_gif()
    



